import pandas as pd
import os

data_path='../raw_data/music_data'

def music_data(nrows=None):
    # 1、音乐元数据信息
    music_meta = os.path.join(data_path, 'music_meta')
    df_music_meta = pd.read_csv(music_meta, sep='\001', nrows=nrows, names=['item_id', 'desc', 'total_timelen', 'tags'])
    # df_music_meta 针对NaN 进行ETL
    df_music_meta = df_music_meta.fillna('-')
    del df_music_meta['desc']
    return df_music_meta

def user_profile(nrows=None):
    # 2、用户元数据信息
    user_profile = os.path.join(data_path, 'user_profile.data')
    df_up = pd.read_csv(user_profile, sep=',', nrows=nrows
                        , names=['user_id', 'gender', 'age', 'salary', 'province'])
    return df_up

def user_watch(nrows=None):
    # 3、用户收听音乐相关数据
    user_watch_pref = os.path.join(data_path, 'user_watch_pref.sml')

    df_uwp = pd.read_csv(user_watch_pref, sep='\001', nrows=nrows
                         , names=['user_id', 'item_id', 'stay_seconds', 'hour'])
    return df_uwp

if __name__ == '__main__':
    us = music_data(20)
    # us = user_profile()
    # us = user_watch()
    print(us.head())